### Weights for 2022 pre-match

##excluyendo NAs': que no responde confianza, y que no vota, y que no tiene interés en política

pb2022 <- pb2022 %>% filter(!is.na(prs_trst_w02),!is.na(prs_trst_w01),!is.na(delta_elec_winner),!is.na(pol_interesF_w01))

pb2022 <- pb2022 %>% mutate(educ_r = factor(case_when(
  educ_w01 >= 1 & educ_w01 <= 5 | educ_w01 == 99 ~ 1,
  educ_w01 == 6  ~ 2,
  educ_w01 == 7  ~ 3,
  educ_w01 == 8  ~ 4,
  educ_w01 == 9 | educ_w01 == 10 ~ 5
), labels = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
              "Profesional Incompleto","Profesional y Postgrado")),
edad_r = factor(case_when(
  attribute_235_w01 %in% c(18:29) ~ 1,
  attribute_235_w01 %in% c(30:39) ~ 2,
  attribute_235_w01 %in% c(40:49) ~ 3,
  attribute_235_w01 %in% c(50:100) ~ 4
),
labels = c("18 a 29","30 a 39","40 a 49", "50 a 65")
))

pb2022$educaFr_w03 <- factor(car::recode(pb2022$educ_w01, "1:5=1; 6:7=2; 8:10=3; 99=1"),
                         labels=c("Secondary Ed.", "Tertiary Tech.", "Tertiary Univ"))

pb2022$sec <- ifelse(pb2022$educaFr_w03 == "Secondary Ed.",1,0)
pb2022$ter_tec <- ifelse(pb2022$educaFr_w03 == "Tertiary Tech.",1,0)
pb2022$ter_univ <- ifelse(pb2022$educaFr_w03 == "Tertiary Univ",1,0)

edad.dist <- data.frame(edad_r = c("18 a 29","30 a 39","40 a 49", "50 a 65"),
                        Freq = nrow(pb2022) * c(0.29, 0.21, 0.19, 0.31)) # En base a CASEN 2020
educ.dist <- data.frame(educ_r = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
                                   "Profesional Incompleto","Profesional y Postgrado"),
                        Freq = nrow(pb2022) * c(0.58,0.04,0.09,0.11,0.18)) # En base a CASEN 2020


k <- NULL
l <- list()

for (k in 2:20){
  # The weight is created
  
  svy.pb2022 <- svydesign(ids = ~1, data = pb2022)
  svy.pb2022.rake <- rake(design = svy.pb2022, sample.margins = list(~educ_r,~edad_r),
                      population.margins = list(educ.dist,edad.dist))
  
  # A df with the values of education is created
  
  df <- as.data.frame(prop.table(table(svy.pb2022.rake$variables["educaFr_w03"])))
  colnames(df)[2] <- "Unweighted Mean"
  df <- merge(df,as.data.frame(prop.table(svytable(~educaFr_w03,design=svy.pb2022.rake))),by="educaFr_w03")
  colnames(df)[3] <- "Pre-Trim Mean"
  df <- cbind(df,Population = c(.61,.11,.25))
  df$mediana <- median(weights(svy.pb2022.rake))
  df$iqr <- IQR(weights(svy.pb2022.rake))
  max_pre_trim <- max(weights(svy.pb2022.rake))
  
  #The weight is trimmed
  
  svy.pb2022.rake <- trimWeights(svy.pb2022.rake, lower=1/(median(weights(svy.pb2022.rake))+k*IQR(weights(svy.pb2022.rake))), 
                             upper=median(weights(svy.pb2022.rake))+k*IQR(weights(svy.pb2022.rake)))
  df <- cbind(df,as.data.frame(prop.table(svytable(~educaFr_w03,design=svy.pb2022.rake)))[2])
  colnames(df)[which(colnames(df)=="Freq")] <- "Weighted Mean" # This is the trimmed weight
  
  df$Variance <- 0
  df$Variance[1] <- svyvar(~sec,design=svy.pb2022.rake,na.rm=T)
  df$Variance[2] <- svyvar(~ter_tec,design=svy.pb2022.rake,na.rm=T)
  df$Variance[3] <- svyvar(~ter_univ,design=svy.pb2022.rake,na.rm=T)
  
  df$mse <- (df$`Weighted Mean`- df$Population)^2+df$Variance
  df$max_pre_trim <- max_pre_trim
  df$max_post_trim <- max(weights(svy.pb2022.rake))
  
  # A variable that informs the relative bias and the relative variance is created (Potter & Zheng, 2015, pg.2713)
  df$rel_bias <- 100*(df$Population-df$`Weighted Mean`)/df$Population
  df$rel_var <- 100*(df$`Weighted Mean`-df$`Pre-Trim Mean`)/df$`Pre-Trim Mean`
  
  df$k <- k
  
  # df is stored in a list  
  l[[k]] <- df
}

library(data.table)
library(gridExtra)

#df2 <- data.table(m=names(l),dplyr::bind_rows(l))
df2 <- do.call("rbind", l)
df2 <- df2 %>% group_by(educaFr_w03) %>% mutate(delta_mse = mse-lag(mse))

mse_plot22 <- ggplot(data=filter(df2,educaFr_w03!="Tertiary Tech."),aes(k, mse, color=educaFr_w03)) + 
  geom_line() + geom_point() + labs(x="K",y="MSE",color="Educational Level",
                                    title="A) MSE Reduction by K") + theme_bw()

delta_plot22 <- ggplot(data=filter(df2,educaFr_w03!="Tertiary Tech."), aes(k, delta_mse, color=educaFr_w03)) + 
  geom_line() + geom_point() + labs(x="K",y="Delta MSE",color="Educational Level",
                                    title="B) MSE Change by each additional K") + theme_bw()


ggsave(file=paste0(ruta,"/Articulos/Electoral Winners y Plebiscito/Resultados/MSE Pb2022.png"),
       gridExtra::grid.arrange(mse_plot22,delta_plot22),dpi=1000)

#Weight trimming
k <- 9
svy.pb2022.rake <- trimWeights(svy.pb2022.rake, lower=1/(median(weights(svy.pb2022.rake)+k*IQR(weights(svy.pb2022.rake)))), 
                               upper=median(weights(svy.pb2022.rake))+k*IQR(weights(svy.pb2022.rake)))

pb2022 <- as_data_frame_with_weights(svy.pb2022.rake,full_wgt_name = "weight.educ.edad.pre_match",
                                    rep_wgt_prefix = "REP_WGT_")

pb2022 <- pb2022 %>% dplyr::select(-educ_r,-edad_r)

summary(pb2022$weight.educ.edad.pre_match)


